Challenge 9: Baby Names - Tables

Author

Janalyn Lao

Setup

Code
library(tidyverse)
library(here)
library(knitr)
library(kableExtra)
library(DT)

Question 1

Code
a_names <- read_csv(here::here("supporting_artifacts", 
                               "learning_targets", 
                               "StateNames_A.csv"))

a_names <- a_names |> 
  rename(Sex_at_Birth = 'Gender')

datatable(a_names,
          colnames = c("Baby Names", 
                       "Year Observed", 
                       "Sex Assigned at Birth",
                       "State Observed",
                       "Count of Babies with Name"),
          class = 'cell-border stripe',
          caption = "Table of Baby Names that Start With the Letter 'A'",
          filter = "top",
          editable = "cell")

Question 2

Code
allison_both_sexes <- a_names |> 
  filter(Name == "Allison") |> 
  group_by(Sex_at_Birth, State) |> 
  summarize(sex_count = sum(Count)) |> 
  pivot_wider(names_from = Sex_at_Birth,
              values_from = sex_count)
allison_both_sexes[is.na(allison_both_sexes)] <- 0

kable(allison_both_sexes, 
      format = "pipe",
      col.names = c("State Observed", "Count of Female Babies", "Count of Male Babies"),
      align = "c",
      caption = "Count of Female and Male Babies Named 'Allison' Per State")
Count of Female and Male Babies Named ‘Allison’ Per State
State Observed Count of Female Babies Count of Male Babies
AK 232 0
AL 1535 0
AR 1198 0
AZ 1880 0
CA 12413 0
CO 1594 0
CT 1099 0
DC 321 0
DE 294 0
FL 4455 0
GA 3257 0
HI 183 0
IA 1477 0
ID 451 0
IL 5110 0
IN 3067 0
KS 1283 0
KY 1905 20
LA 1209 0
MA 2218 0
MD 2229 0
ME 340 0
MI 4014 0
MN 2374 0
MO 2882 0
MS 817 0
MT 226 0
NC 3435 0
ND 285 0
NE 807 0
NH 412 0
NJ 3052 0
NM 399 0
NV 729 0
NY 5747 0
OH 5487 0
OK 1421 0
OR 1186 0
PA 4307 0
RI 306 0
SC 1228 0
SD 376 0
TN 2488 0
TX 10192 0
UT 1125 0
VA 3220 0
VT 135 0
WA 1956 0
WI 2367 0
WV 813 0
WY 142 0

Question 3

Code
allison_females <- a_names |>
  filter(Name == "Allison",
         Sex_at_Birth == "F") |> 
  group_by(Sex_at_Birth, State) |> 
  summarize(sex_count = sum(Count), .groups = "drop")

Question 4

Code
allison <- a_names |>
  filter(Name == "Allison") |> 
  group_by(Year) |> 
  summarize(a_count = sum(Count), .groups = "drop") 
  
ggplot(data = allison,
       mapping = aes(x = Year, y = a_count)) +
    geom_point() +
    geom_line() +
    labs(x = "Year Observed",
         y = "",
         title = "Count of 'Allison'") +
    scale_x_continuous(n.breaks=10) +
    scale_y_continuous(n.breaks=10)

Code
# https://www.statology.org/ggplot-axis-ticks/

Question 5

Code
allison_lm <- allison |> 
  lm(a_count ~ Year, data = _)

allison_lm

Call:
lm(formula = a_count ~ Year, data = allison)

Coefficients:
(Intercept)         Year  
   209815.1       -101.6  

Question 6

Code
allison |> 
  ggplot(aes(x = a_count, y = Year)) +
  geom_point() +
  stat_smooth(method = "lm") + 
  labs(x = "Year Observed", 
       y = "",
       title = "Count of 'Allison'")
`geom_smooth()` using formula 'y ~ x'

Estimated Regression Equation

y = 209815.1 - 101.6x

Question 7

Code
allison_lm |> 
  broom::augment() |> 
  ggplot(mapping = aes(x = Year,
                       y = .resid)) +
  geom_point() +
  labs(x = "Year Observed",
       y = "",
       title = "Residuals") +
  scale_x_continuous(n.breaks=10)

The only significant pattern I see is that starting in 2011, the residuals begin to increase in a linear way.

What can you conclude from this model?

Based on this model, there was an increasing amount of children being named “Allison” than was actually predicted, resulting in a positive linear pattern. Where this data ends in 2014, your name wasn’t as cool as it was in 1998 or 2009, but if this pattern has continued, it is possible that your name is getting more and more cool again.

Question 8

Code
a_names |> 
  filter(Sex_at_Birth == "M",
         Name == "Allan" | Name == "Alan" | Name == "Allen") |> 
  group_by(Name, Year) |> 
  summarise(al_count = sum(Count), .groups = "drop") |> 
  ggplot(mapping = aes(x = Year, y = al_count, color = Name)) +
    geom_point() +
    geom_line() +
    labs(x = "Year Observed",
         y = "",
         title = "Count of Name Variant") +
    scale_x_continuous(n.breaks=10) +
    scale_y_continuous(n.breaks=10)

Question 9

Code
als_PACA_2000 <- a_names |> 
  filter(Name == "Allan" | Name == "Alan" | Name == "Allen",
         Year == "2000",
         State == "PA" | State == "CA",
         Sex_at_Birth == "M") |> 
  group_by(State, Name) |> 
  summarize(al_counts = sum(Count), .groups = "drop") |> 
  pivot_wider(names_from = Name,
              values_from = al_counts)

kable(als_PACA_2000, 
      format = "pipe",
      col.names = c("State Observed", 
                    "Count of 'Alan' Babies'", 
                    "Count of 'Allan' Babies",
                    "Count of 'Allen' Babies"),
      align = "c",
      caption = "Count of Babies Named 'Alan/Allan/Allen' Per State")
Count of Babies Named ‘Alan/Allan/Allen’ Per State
State Observed Count of ‘Alan’ Babies’ Count of ‘Allan’ Babies Count of ‘Allen’ Babies
CA 579 131 176
PA 51 12 56

Question 10

Code
convert_percent <- function(row, total) {
  stopifnot(is.numeric(row))
  stopifnot(is.numeric(total))
  
  percentage <- round((row / total) * 100, 2)
  
  return(percentage)
}

al_percent <- a_names |> 
  filter(Name == "Allan" | Name == "Alan" | Name == "Allen",
         Year == "2000",
         State == "PA" | State == "CA",
         Sex_at_Birth == "M") |> 
  group_by(State, Name) |> 
  summarize(al_count = sum(Count), .groups = "drop") |> 
  pivot_wider(names_from = Name,
              values_from = al_count) |> 
  rowwise() |> 
  mutate(total_count = sum(c_across(Alan : Allen)),
         Alan = convert_percent(Alan, total_count),
         Allan = convert_percent(Allan, total_count),
         Allen = convert_percent(Allen, total_count)) |> 
  subset(, select = -c(total_count))

kable(al_percent, 
      format = "pipe",
      col.names = c("State Observed", 
                    "Percent of 'Alan' Babies'", 
                    "Percent of 'Allan' Babies",
                    "Percent of 'Allen' Babies"),
      align = "c",
      caption = "Percent of Babies Named 'Alan/Allan/Allen' Per State")
Percent of Babies Named ‘Alan/Allan/Allen’ Per State
State Observed Percent of ‘Alan’ Babies’ Percent of ‘Allan’ Babies Percent of ‘Allen’ Babies
CA 65.35 14.79 19.86
PA 42.86 10.08 47.06

Question 11

Code
al_percent |> 
  kbl(col.names = c("State Observed", 
                    "Percent of 'Alan' Babies'", 
                    "Percent of 'Allan' Babies",
                    "Percent of 'Allen' Babies"),
      caption = "Percent of Babies Named 'Alan/Allan/Allen' Per State",
      align = "c") |> 
  kable_styling(bootstrap_options = c("striped", "hover", "condensed", "bordered"),
                position = "left",
                html_font = "Times New Roman")
Percent of Babies Named 'Alan/Allan/Allen' Per State
State Observed Percent of 'Alan' Babies' Percent of 'Allan' Babies Percent of 'Allen' Babies
CA 65.35 14.79 19.86
PA 42.86 10.08 47.06